使用go.Pie()函数可以绘制饼图。Pie函数中最重要的两个属性是:labels表示标签(一般是离散型变量),values表示该标签对应的数值。
其余样式设置与前面学习的函数基本类似,常用参数如下:
# 4-1 Basic Pie Chart
import plotly.graph_objects as go
labels = ['Oxygen','Hydrogen','Carbon_Dioxide','Nitrogen']
values = [4500, 2500, 1053, 500]
fig = go.Figure(data = go.Pie(
labels = labels, # labels 设置扇区的标签
values = values, # values 设置对应的数值
# hole=0.4 # hole 设置内径口的半径,范围0-1,默认为0,设置后变为 Donut Chart
))
fig.show()
# 4-2 Styled Pie Chart
import plotly.graph_objects as go
# 设置颜色列表colors
colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']
fig = go.Figure(data=go.Pie(
# labels 设置扇区的标签,values设置对应的数值
labels = labels,
values = values,
# marker 数据节点参数设置,包括颜色、线条、大小等
marker = dict(colors = colors,
line = dict(color = '#000000',width = 2)),
# opacity 设置不透明度
opacity = 0.6,
# hoverinfo 设置交互时显示的标签,label表示标签,value表示数值,percent表示比例
hoverinfo = 'label+percent',
# textinfo 设置饼图上显示的标签,label表示标签,value表示数值,percent表示比例
textinfo = 'value+percent',
textfont = dict(size = 15,color = 'black'),
textposition = 'auto',
# pull 设置突出扇区的比例,范围是0-1,默认为0
pull = [0,0.1,0,0],
# rotation 设置扇区旋转角度
rotation = 30,
# direction 设置饼图方向,有clockwise(顺时针)和counterclockwise(逆时针,默认)
direction = 'clockwise'
))
fig.show()
导入数据文件'Sample - Superstore.xls',绘制饼图,展示每个地区(Region)销售额(Sales)总和的占比情况。
# 数据分析:得到每个地区的销售额总和
import pandas as pd
df = pd.read_excel('Sample - Superstore.xls',sheet_name='Orders')
data = df.groupby('Region')['Sales'].sum()
data
Region Central 501239.8908 East 678781.2400 South 391721.9050 West 725457.8245 Name: Sales, dtype: float64
# 绘制饼图,并在饼图上显示地区和占比
import plotly.graph_objects as go
fig.update_layout(
title='Sales by Region',
)
fig.show()
导入数据文件'Sample - Superstore.xls',绘制两个饼图,分别展示2015年和2018年每个地区(Region)销售额(Sales)总和的占比情况。
# 数据分析:得到2015年和2018年每个地区的销售额总和
import pandas as pd
df = pd.read_excel('Sample - Superstore.xls',sheet_name='Orders')
data1 = df[df['Order Date'].array.year==2015]
data1 = data1.groupby('Region')['Sales'].sum()
data2 = df[df['Order Date'].array.year==2018]
data2 = data2.groupby('Region')['Sales'].sum()
# 将两组数据进行合并,赋值给data变量
data = pd.merge(data1,data2,left_index = True,right_index = True, suffixes = ('_2015','_2018'))
data
| Sales_2015 | Sales_2018 | |
|---|---|---|
| Region | ||
| Central | 103838.1646 | 147098.1282 |
| East | 128680.4570 | 213082.9040 |
| South | 103845.8435 | 122905.8575 |
| West | 147883.0330 | 250128.3655 |
# 绘制多个饼图 Pie Charts in subplots
# 方法1. 使用domain参数设置每个Pie的区域
import plotly.graph_objects as go
fig = go.Figure()
fig.update_layout(title = 'Sales by Region')
fig.show()
# 方法2. 使用make_subplots方法创建多子图
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# 使用make_subplots方法创建多子图
fig = make_subplots(rows = 1, cols = 2, # 设置行数和列数
specs = [[{'type':'domain'}, {'type':'domain'}]], # 设置每个子图的类型,当绘制饼图时类型为domain
subplot_titles=['Sales in 2015','Sales in 2018']) # 设置每个子图的标题
fig.add_trace(trace = go.Pie( labels = data.index,
values = data['Sales_2015']),
row = 1, col = 1 ) # 设置该子图的位置
fig.add_trace(trace = go.Pie( labels = data.index,
values = data['Sales_2018']),
row = 1, col = 2 ) # 设置该子图的位置
fig.update_traces(
opacity=0.9,
hole=0.4,
# scalegroup 如果多个trace属于同一个scalegroup,将会根据总数值来显示面积比例
scalegroup='two'
)
fig.update_layout(title = 'Sales by Region')
fig.show()
在展示占比情况时,饼图是我们最常用的选择。但是,饼图只能展示单层数据的占比情况,在面对多层级数据时,我们真的要用多个饼图进行可视化吗? ——集万千饼图于一身:旭日图(Sunburst Chart)是饼图的变形,简单来说是多个饼图的组合升级版。饼图只能展示一层数据的占比情况,而旭日图不仅可以展示数据的占比情况,还能厘清多层级数据之间的关系。
在旭日图中,一个圆环代表一个层级的分类数据,一个环块所代表的数值可以体现该数据在同层级数据中的占比。一般情况下,内层数据是相邻的外层数据的父类别,最内层圆环的分类级别最高,越往外,分类越细越具体。最基础的旭日图是在树状图的基础上,把树状的层级关系转化为圆环的形式,比树状图更加节约空间。在增加了交互功能之后,旭日图每一层的数据关系能更加清楚地展现出来。
可以使用go.Sunburst()函数来绘制旭日图。该函数中最重要的几个属性如下所示:
# 4-3 Basic Sunburst Plot
import plotly.graph_objects as go
labels = ["Eve", "Cain", "Seth", "Abel", "Awan", "Azura", "Enos", "Noam", "Enoch"]
parents = ["", "Eve", "Eve", "Eve", "Eve", "Eve", "Seth", "Seth", "Awan"]
values = [ 42, 14, 12, 6, 6, 4, 10, 2, 4]
fig = go.Figure(data = go.Sunburst(
# labels:设置旭日图环块上的标签(所有的节点)
labels=labels,
# parents:设置每个labels对应的父节点标签
parents=parents, # 此例一个根节点,用空字符串表示
# values:设置每个labels对应的数值,可以缺省
values=values,
# branchvalues: 设置环块的宽度模式
# total表示宽度为父节点的值(且必须大于等于子节点总和);remainder表示宽度为父节点+子节点的值(默认)
branchvalues='total' # 数值的大小会影响图的展示
))
fig.update_layout(
# margin:将四周边缘间距设置为0,将图表呈现最大
margin = dict(t=0, l=0, r=0, b=0)
)
fig.show()
导入数据文件'Sample - Superstore.xls',绘制旭日图,展示商品类别(Category)和商品子类别(Sub-Category)之间的层级关系,环块的宽度体现销售额(Sales)总和。
# 数据分析:得到每个类别-子类别的销售额总和
import pandas as pd
df = pd.read_excel('Sample - Superstore.xls',sheet_name='Orders')
# 先对'Category'-'Sub-Category'进行分组,计算每组'Sales'总和
data = df.groupby(['Category', 'Sub-Category'])['Sales'].sum()
data # 得到一个Multi-Index的Series
Category Sub-Category
Furniture Bookcases 114879.9963
Chairs 328449.1030
Furnishings 91705.1640
Tables 206965.5320
Office Supplies Appliances 107532.1610
Art 27118.7920
Binders 203412.7330
Envelopes 16476.4020
Fasteners 3024.2800
Labels 12486.3120
Paper 78479.2060
Storage 223843.6080
Supplies 46673.5380
Technology Accessories 167380.3180
Copiers 149528.0300
Machines 189238.6310
Phones 330007.0540
Name: Sales, dtype: float64
# 绘制的旭日图有两层,内层是Category(父节点3个),外层是Sub-Category(子节点)
labels = list(data.index.levels[0]) # labels列表包括所有节点标签,初值为3个父节点
parents = ['']*3 # parents列表初值为3个根节点,即空字符串
for (i1,i2) in data.index: # 遍历data的多层索引
labels.append(i2) # 通过循环向labels列表添加子节点
parents.append(i1) # 通过循环向parents列表添加子节点的父节点,即多个重复的Category
print('labels:',labels)
print('\nparents:',parents)
labels: ['Furniture', 'Office Supplies', 'Technology', 'Bookcases', 'Chairs', 'Furnishings', 'Tables', 'Appliances', 'Art', 'Binders', 'Envelopes', 'Fasteners', 'Labels', 'Paper', 'Storage', 'Supplies', 'Accessories', 'Copiers', 'Machines', 'Phones'] parents: ['', '', '', 'Furniture', 'Furniture', 'Furniture', 'Furniture', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Technology', 'Technology', 'Technology', 'Technology']
# valuaes列表是每个labels对应的数值,因此是3个Category的Sales总和 + 每个Sub-Category的Sales总和
values = list(data.groupby(level='Category').sum())+list(data.values)
print('values:',values)
values: [741999.7953, 719047.032, 836154.033, 114879.9963, 328449.103, 91705.164, 206965.532, 107532.161, 27118.792, 203412.733, 16476.402000000002, 3024.28, 12486.312, 78479.206, 223843.608, 46673.538, 167380.318, 149528.03, 189238.631, 330007.054]
# 绘制旭日图
import plotly.graph_objects as go
fig.show()
在展示占比情况时,饼图是我们最常用的选择。如果要体现多层级数据之间的占比情况,旭日图也提供了很好的可视化。 层级图的功能等同于旭日图,非常适合于显示层级结构,或者更确切的说树状结构的数据,只是展示形式有所不同:
层级图的外部矩形代表父类别,内部矩形代表子类别。可以使用go.Treemap()函数来绘制层级图,其参数与go.Sunburst()函数几乎一致。
# 6-3 Basic Treemap 基本层级图
import plotly.graph_objects as go
labels = ["Eve", "Cain", "Seth", "Abel", "Awan", "Azura", "Enos", "Noam", "Enoch"]
parents = ["", "Eve", "Eve", "Eve", "Eve", "Eve", "Seth", "Seth", "Awan"]
values = [ 42, 14, 12, 6, 6, 4, 10, 2, 4]
fig = go.Figure(go.Treemap(
labels = labels,
parents = parents,
values = values,
# 若设置为total:父节点的面积即为父节点的值,应大于等于子节点的值相加
# 若设置为remainder:父节点的面积为父节点的值+子节点的值(默认)
branchvalues='total',
# 设置根节点的颜色
root_color='lightgrey'
))
fig.show()
为了展示更多信息,可以设置层级图中不同矩形板块的颜色:
- 离散的颜色取值:可以使用marker_colors或者colorway,传递固定的颜色
- 连续的颜色取值:将变量传递给marker_colors,再通过colorscale来设定颜色范围
# 6-4 Set Color of Treemap Sectors 为层级图设置颜色
import plotly.graph_objects as go
from plotly.subplots import make_subplots
fig = make_subplots(
rows = 1, cols = 2,
subplot_titles = ('discrete colors', 'continuous colorscale'),
specs = [[{'type': 'treemap'}, {'type': 'treemap'}]]
)
fig.add_trace(go.Treemap(
labels = labels,
parents = parents,
root_color = 'lightgrey',
# 对于离散的颜色,可以直接传递给marker_colors
# marker_colors = ['lightgrey']+['lightblue']*5+['pink']*3
), row=1, col=1)
fig.add_trace(go.Treemap(
labels = labels,
parents = parents,
# 对于连续的颜色,将变量传递给colors,再通过colorscale来设定颜色范围
marker = dict(colors = values, colorscale='Blues_r',showscale=True)
), row=1, col=2)
fig.update_layout(
# 设置根节点下一层的标签所在板块的颜色,里面子节点的颜色会随着层次的增加而变化
treemapcolorway = ['lightblue','white','lightcyan','pink','grey']
)
fig.show()
导入数据文件'Sample - Superstore.xls',绘制层级图,展示商品类别(Category)和商品子类别(Sub-Category)之间的层级关系,矩形版块的面积大小体现销售额(Sales)总和。
# 数据分析:得到每个类别-子类别的销售额总和
import pandas as pd
df = pd.read_excel('Sample - Superstore.xls',sheet_name='Orders')
# 先对'Category'-'Sub-Category'进行分组,计算每组'Sales'总和
data = df.groupby(['Category', 'Sub-Category'])['Sales'].sum()
data # 得到一个Multi-Index的Series
Category Sub-Category
Furniture Bookcases 114879.9963
Chairs 328449.1030
Furnishings 91705.1640
Tables 206965.5320
Office Supplies Appliances 107532.1610
Art 27118.7920
Binders 203412.7330
Envelopes 16476.4020
Fasteners 3024.2800
Labels 12486.3120
Paper 78479.2060
Storage 223843.6080
Supplies 46673.5380
Technology Accessories 167380.3180
Copiers 149528.0300
Machines 189238.6310
Phones 330007.0540
Name: Sales, dtype: float64
# 该层级图有两层,外部矩形代表父类别(Category),内部矩形代表子类别(Sub-Category)
labels = list(data.index.levels[0]) # labels列表包括所有节点标签,初值为3个父节点
parents = ['']*3 # parents列表初值为3个根节点,即空字符串
for (i,v) in data.index: # 遍历data的多层索引
labels.append(v) # 通过循环向labels列表添加子节点
parents.append(i) # 通过循环向parents列表添加子节点对应的父节点,即多个重复的Category
print('labels:',labels)
print('\nparents:',parents)
labels: ['Furniture', 'Office Supplies', 'Technology', 'Bookcases', 'Chairs', 'Furnishings', 'Tables', 'Appliances', 'Art', 'Binders', 'Envelopes', 'Fasteners', 'Labels', 'Paper', 'Storage', 'Supplies', 'Accessories', 'Copiers', 'Machines', 'Phones'] parents: ['', '', '', 'Furniture', 'Furniture', 'Furniture', 'Furniture', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Office Supplies', 'Technology', 'Technology', 'Technology', 'Technology']
# values列表是每个labels对应的数值,因此是3个Category的Sales总和 连接(+) 每个Sub-Category的Sales总和
values = list(data.groupby(level='Category').sum())+list(data.values)
print('values:',values)
values: [741999.7953, 719047.032, 836154.033, 114879.9963, 328449.103, 91705.164, 206965.532, 107532.161, 27118.792, 203412.733, 16476.402000000002, 3024.28, 12486.312, 78479.206, 223843.608, 46673.538, 167380.318, 149528.03, 189238.631, 330007.054]
# 绘制层级图:将三个不同的Category设置成三种离散的颜色
# 绘制层级图:将每个标签所在板块的颜色随着销量的值深浅变化
导入数据文件'Sample - Superstore.xls',绘制层级图,展示地区(Region)和州(States)之间的层级,矩形板块的面积体现销售额(Sales)总和,矩形板块的颜色体现利润(Profit)总和。其中文本显示标签和销量全国百分比,交互时显示值和区域占比。
import pandas as pd
import numpy as np
import plotly.graph_objects as go
# 先对'Region'-'State'进行分组,计算每组'Sales'和‘Profit’总和
data = df.groupby(['Region','State'])[['Sales','Profit']].sum()
fig.update_layout(
title = 'Sales and Profit by Region & States',
margin = dict(l=0, r=0, b=0)
)
fig.show()